Enhance RssAgent with the `events_order` option

This should fix #907.

Akinori MUSHA 8 years ago
parent
commit
1d33417cee
3 changed files with 50 additions and 15 deletions
  1. 1 0
      CHANGES.md
  2. 34 15
      app/models/agents/rss_agent.rb
  3. 15 0
      spec/models/agents/rss_agent_spec.rb

+ 1 - 0
CHANGES.md

@@ -1,5 +1,6 @@
1 1
 # Changes
2 2
 
3
+* Jul 30, 2015   - RssAgent can configure the order of events created via `events_order`.
3 4
 * Jul 29, 2015   - WebsiteAgent can configure the order of events created via `events_order`.
4 5
 * Jul 29, 2015   - DataOutputAgent can configure the order of events in the output via `events_order`.
5 6
 * Jul 20, 2015   - Control Links (used by the SchedularAgent) are correctly exported in Scenarios.

+ 34 - 15
app/models/agents/rss_agent.rb

@@ -9,6 +9,8 @@ module Agents
9 9
     can_dry_run!
10 10
     default_schedule "every_1d"
11 11
 
12
+    DEFAULT_EVENTS_ORDER = [['{{date_published}}', 'time'], ['{{last_updated}}', 'time']]
13
+
12 14
     description do
13 15
       <<-MD
14 16
         This Agent consumes RSS feeds and emits events when they change.
@@ -29,6 +31,12 @@ module Agents
29 31
           * `disable_url_encoding` - Set to `true` to disable url encoding.
30 32
           * `user_agent` - A custom User-Agent name (default: "Faraday v#{Faraday::VERSION}").
31 33
           * `max_events_per_run` - Limit number of events created (items parsed) per run for feed.
34
+
35
+        # Ordering Events
36
+
37
+        #{description_events_order}
38
+
39
+        In this Agent, the default value for `events_order` is `#{DEFAULT_EVENTS_ORDER.to_json}`.
32 40
       MD
33 41
     end
34 42
 
@@ -70,6 +78,11 @@ module Agents
70 78
       end
71 79
 
72 80
       validate_web_request_options!
81
+      validate_events_order
82
+    end
83
+
84
+    def events_order
85
+      super.presence || DEFAULT_EVENTS_ORDER
73 86
     end
74 87
 
75 88
     def check
@@ -77,26 +90,15 @@ module Agents
77 90
         response = faraday.get(url)
78 91
         if response.success?
79 92
           feed = FeedNormalizer::FeedNormalizer.parse(response.body)
80
-          feed.clean! if interpolated['clean'] == 'true'
93
+          feed.clean! if boolify(interpolated['clean'])
81 94
           max_events = (interpolated['max_events_per_run'].presence || 0).to_i
82 95
           created_event_count = 0
83
-          feed.entries.sort_by { |entry| [entry.date_published, entry.last_updated] }.each.with_index do |entry, index|
96
+          sort_events(feed_to_events(feed)).each.with_index do |event, index|
84 97
             break if max_events && max_events > 0 && index >= max_events
85
-            entry_id = get_entry_id(entry)
98
+            entry_id = event.payload[:id]
86 99
             if check_and_track(entry_id)
87 100
               created_event_count += 1
88
-              create_event(payload: {
89
-                id: entry_id,
90
-                date_published: entry.date_published,
91
-                last_updated: entry.last_updated,
92
-                url: entry.url,
93
-                urls: entry.urls,
94
-                description: entry.description,
95
-                content: entry.content,
96
-                title: entry.title,
97
-                authors: entry.authors,
98
-                categories: entry.categories
99
-              })
101
+              create_event(event)
100 102
             end
101 103
           end
102 104
           log "Fetched #{url} and created #{created_event_count} event(s)."
@@ -122,5 +124,22 @@ module Agents
122 124
         true
123 125
       end
124 126
     end
127
+
128
+    def feed_to_events(feed)
129
+      feed.entries.map { |entry|
130
+        Event.new(payload: {
131
+                    id: get_entry_id(entry),
132
+                    date_published: entry.date_published,
133
+                    last_updated: entry.last_updated,
134
+                    url: entry.url,
135
+                    urls: entry.urls,
136
+                    description: entry.description,
137
+                    content: entry.content,
138
+                    title: entry.title,
139
+                    authors: entry.authors,
140
+                    categories: entry.categories
141
+                  })
142
+      }
143
+    end
125 144
   end
126 145
 end

+ 15 - 0
spec/models/agents/rss_agent_spec.rb

@@ -66,6 +66,21 @@ describe Agents::RssAgent do
66 66
       expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af"])
67 67
     end
68 68
 
69
+    it "should emit items as events in the order specified in the events_order option" do
70
+      expect {
71
+        agent.options['events_order'] = ['{{title | replace_regex: "^[[:space:]]+", "" }}']
72
+        agent.check
73
+      }.to change { agent.events.count }.by(20)
74
+
75
+      first, *, last = agent.events.last(20)
76
+      expect(first.payload['title'].strip).to eq('upgrade rails and gems')
77
+      expect(first.payload['url']).to eq("https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01")
78
+      expect(first.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01"])
79
+      expect(last.payload['title'].strip).to eq('Dashed line in a diagram indicates propagate_immediately being false.')
80
+      expect(last.payload['url']).to eq("https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535")
81
+      expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535"])
82
+    end
83
+
69 84
     it "should track ids and not re-emit the same item when seen again" do
70 85
       agent.check
71 86
       expect(agent.memory['seen_ids']).to eq(agent.events.map {|e| e.payload['id'] })